{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Resilient Object Streaming in AIStore\n",
    "\n",
    "The following demo shows how to use `ObjectFileReader` (`aistore.sdk.obj.obj_file.object_file`) to stream large objects amidst unexpected cases of stream interruptions (e.g. `ChunkedEncodingError`, `ConnectionError`) or timeouts (e.g. `ReadTimeout`) mid-read:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0a5305d0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Step 0: Import Necessary Libraries\n",
    "\n",
    "import os\n",
    "import requests\n",
    "import shutil\n",
    "import tarfile\n",
    "\n",
    "from aistore.sdk.client import Client"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9d0d3c87",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Step 1: Initialize AIStore Client\n",
    "\n",
    "AIS_ENDPOINT = \"http://localhost:8080\"\n",
    "client = Client(AIS_ENDPOINT)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aa5919a0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Step 2: Prepare Bucket w/ Tar Data\n",
    "\n",
    "LIBRISPEECH_URL = \"https://www.openslr.org/resources/31/dev-clean-2.tar.gz\"\n",
    "DOWNLOAD_PATH = \"./dev-clean-2.tar.gz\"\n",
    "OBJECT_NAME = \"librispeech-dev-clean-2.tar.gz\"\n",
    "BUCKET_NAME = \"test-librispeech-bucket\"\n",
    "\n",
    "# Step 2a: Download the compressed tar.gz file if it doesn't already exist\n",
    "if not os.path.exists(DOWNLOAD_PATH):\n",
    "    response = requests.get(LIBRISPEECH_URL, stream=True, timeout=10)\n",
    "    with open(DOWNLOAD_PATH, \"wb\") as f:\n",
    "        shutil.copyfileobj(response.raw, f)\n",
    "\n",
    "# Step 2b: Upload the tar.gz file to AIStore\n",
    "client.bucket(BUCKET_NAME).create(exist_ok=True)\n",
    "client.bucket(BUCKET_NAME).object(OBJECT_NAME).get_writer().put_file(DOWNLOAD_PATH)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f0b6fc75",
   "metadata": {},
   "source": [
    "The `ObjectFileReader` implementation catches instances of `ChunkedEncodingError` mid-read and retries a new object stream from the last known position to resume safely, where `max_resume` dictates the number of resumes we will allow:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "79b719d6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Step 3: Read using ObjectFileReader (via object.get_reader().as_file())\n",
    "\n",
    "with client.bucket(BUCKET_NAME).object(OBJECT_NAME).get_reader().as_file(max_resume=3) as f:\n",
    "    print(f.read(10))  # Read the first 10 bytes of the file\n",
    "    print(f.read(10))  # Read the next 10 bytes of the file"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bcb789bd",
   "metadata": {},
   "source": [
    "`ObjectFileReader` can be used in any context where a non-seekable, sequential file-like object is expected, such as `tarfile.open` in streaming mode `r|*`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0140aa63",
   "metadata": {},
   "outputs": [],
   "source": [
    "EXTRACT_PATH = \"./librispeech_extract\"\n",
    "os.makedirs(EXTRACT_PATH, exist_ok=True)\n",
    "\n",
    "with client.bucket(BUCKET_NAME).object(OBJECT_NAME).get_reader().as_file(max_resume=3) as f:\n",
    "    with tarfile.open(fileobj=f, mode='r|*') as tar:\n",
    "        tar.extractall(path=EXTRACT_PATH)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fb8e9cd6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Step 5: Clean Up\n",
    "\n",
    "client.bucket(BUCKET_NAME).delete(missing_ok=True)\n",
    "os.remove(DOWNLOAD_PATH)\n",
    "if os.path.exists(EXTRACT_PATH):\n",
    "    os.system(f\"rm -rf {EXTRACT_PATH}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c2ec5602",
   "metadata": {},
   "source": [
    "For more information, please refer to the [Python SDK documentation](https://github.com/NVIDIA/aistore/blob/main/docs/python_sdk.md#object_file)."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "aistore-python-3.11-env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
